rm(list = ls())
## load package and data
library(tidyverse)
library(haven)
library(panelView)
library(readxl)
library(gridExtra)
library(stargazer)
library(ggpubr)
library(lfe)
library(interflex)
library(foreign)
# load data
final_df = read_dta("~/Dropbox/political_risk/replication_files/pc_df_2022test.dta")

final_df2012 = final_df  %>%  filter(year >2011)

#Figure 2:Time Trend in Dismissal of Officials####

##### Time trend of Purge
library(ggpubr)

theme_zl <- function(...) {
  theme_pubclean(15) +
    theme(#axis.text.x = element_text(size=15),
      #axis.text.y = element_text(size=15),
      legend.title = element_blank(),
      legend.position="bottom",
      legend.box="vertical",
      legend.margin=margin(),
      #legend.text=element_text(size=15),
      ...
    )}

corruption <- read_excel("~/Dropbox/political_risk/replication_files/anti_corruption_wang.xlsx") %>%
  group_by(Year) %>% 
  summarise(n =n()) %>% 
  filter(!is.na(Year)) %>% 
  rename(year = Year) %>% 
  filter(year %in% 2000:2016)

# corruption$rank = ordered(corruption$rank, levels = c("Provincial", "Vice-Provincial", "Prefectural","Vice-Prefectural", "County", "Vice-County", "Section Chief and Below"))
# 

level = ggplot(corruption, aes(x = year, y = n)) +
  geom_bar(stat="identity", alpha = .8, color = "black") +
  theme_zl() +
  xlab("") +
  ylab("Number of Officials") 
level
#ggsave( "/Users/zerenli1992/Dropbox/Apps/Overleaf/cost_revolving_door_2020/purge_time_trend.png")

###MAP OF PURGE DENSITY####
library(sf)
library(viridis)

theme_map <- function(...) {
  theme_minimal(15) +
    theme(
      plot.margin = margin(0, 0, 0, 0, "cm"),
      axis.ticks = element_blank(),
      line = element_blank(),
      legend.position = "bottom",
      axis.text = element_blank(),
      axis.title = element_blank(),
      panel.background = element_blank(),
      legend.title = element_text(size = 10),
      legend.text = element_text(size = 8),
      ...
    )}

# map repository
path = "~/Dropbox/political_risk/replication_files/map/dijishi_2004.shp"

data =  read_excel("~/Dropbox/political_risk/replication_files/anti_corruption_wang.xlsx") %>%
  group_by(prefecture,Year) %>% 
  summarise(purge = n())  %>%
  rename(year = Year) %>%
  ungroup() %>% 
  mutate(year = as.numeric(year),
         purge = as.double(purge)) %>% 
  ungroup() %>% 
  group_by(prefecture) %>% 
  summarise(purge_intensity =sum(purge,na.rm = T)) %>% 
  mutate(pref = case_when(prefecture == "上海"~ "上海市市辖区",
                          prefecture == "北京" ~ "北京市市辖区",
                          prefecture == "重庆" ~ "重庆市市辖区",
                          prefecture == "天津" ~ "天津市市辖县",
                          prefecture == "襄阳市" ~ "襄樊市",
                          prefecture == "吐鲁番市" ~ "吐鲁番地区",
                          prefecture == "普洱市"~ "思茅市",
                          prefecture == "日喀则市"~ "日喀则地区",
                          TRUE~ prefecture))  

china_map <- st_read(path,options = "ENCODING=GBK") %>% 
  #read_rds("~/Dropbox/ppp/data/gadm36_CHN_2_sf.rds") %>% 
  #st_sf() %>%
  left_join(data, by = c("NAME2004" = "prefecture")) 
my_breaks = c(10,100,500)


map = ggplot() +
  geom_sf(data= china_map, aes(fill = purge_intensity) , alpha = .8, color = NA) +
  ylim(17, NA) +
  theme_map() +
  scale_fill_viridis(option = "magma",
                     trans ="log",
                     breaks = my_breaks,
                     label = my_breaks,
                     direction = -1,
                     name = "") 


ggarrange(level, map )
#ggsave( "/Users/zerenli1992/Dropbox/Apps/Overleaf/cost_revolving_door_2020/purge_instensity.png")

#Figure 3:Time Trend of RD Officials####

rd_t_trend = read_rds("~/Dropbox/revolving_door_jmp/replication_files/rd_t_trend.rds")
ggplot(rd_t_trend, aes(x = year, y = n)) +
  geom_line() +
  geom_point(size=3) +
  theme_zl() +
  ylab("") +
  xlab("") +
  scale_x_continuous(breaks = seq(min(rd_t_trend$year),max(rd_t_trend$year),2))

#ggsave( "/Users/zerenli1992/Dropbox/Apps/Overleaf/cost_revolving_door_2020/rd_time_trend.png",height = 6, width = 9)

# Figure 4  Time Trends in CARs####
library(stats)
library(wesanderson)

# set theme
theme_zl = function() {
  theme_pubclean(15) + 
    theme(legend.position="bottom", 
          legend.title = element_blank(),
          panel.border = element_blank(), 
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
}


car_combine = read_dta("~/Dropbox/political_risk/replication_files/car_combine.dta") %>% 
  mutate(type = case_when(type == 1 ~ "Unconnected",
                          type == 2 ~ "Survivors", 
                          type == 3 ~ "Losers"))

car_combine %>% 
  ungroup() %>% 
  distinct(Stkcd,type)  %>% 
  group_by(type)  %>% 
  tally()

car_combine %>%
  ungroup() %>% 
  group_by(dif , type) %>%
  mutate(car = mean(car, na.rm = T),
         se = sd(car, na.rm = T))  %>%
  ggplot(aes(x = dif, y = car, group = type,linetype = type,color = type)) +
  geom_line(size = .8) +
  scale_y_continuous(labels = scales::percent) +
  labs(y="CAR",
       x= "day t") +
  theme_zl() +
  scale_color_manual(values=c("#999999","#56B4E9", "#E69F00"))

#ggsave( "/Users/zerenli1992/Dropbox/Apps/Overleaf/cost_revolving_door_2020/car.pdf")


#Figure 5: Dynamic Effect of Revolving-Door Recruitment####

var_label <-c("In 3\n years" ,
              "In 2\n years",
              "In 1\n year",
              "For 1\n year",
              "For 2+\n years",
              "1 year\n ago",
              "2 years\n ago",
              "3 years\n ago")

m1 = felm(roa ~  pc_f3_dummy + pc_f2_dummy + pc_f1_dummy +
            pc_dummy_1 + pc_dummy_2 +  pc_l1_dummy + pc_l2_dummy  +  pc_l3plus_dummy + size  +incmope_l  + npc + cppcc  + cpc  |symbol + year |0|symbol, final_df %>% filter(year %in% 2012:2016))
stargazer::stargazer(m1, type = "text")

m2 = felm(n_positive_r ~   pc_f3_dummy + pc_f2_dummy + pc_f1_dummy +
            pc_dummy_1 + pc_dummy_2 +  pc_l1_dummy + pc_l2_dummy  +  pc_l3plus_dummy + size  +incmope_l  + npc + cppcc  + cpc  |symbol + year |0|symbol, final_df %>% filter(year %in% 2012:2016))


flex_vis = function(m1) {
  df =   summary(m1)$coefficients[1:8,1:2] %>% 
    as.data.frame()  %>% 
    mutate(type = "ROA",
           var = var_label) %>% 
    rename(se = `Cluster s.e.`, 
           coef = `Estimate`) %>% 
    tibble() 
  
  df$var = factor(df$var, levels = var_label)
  rect<-data.frame(xstart=c(0,3.5,5.5),xend=c(3.5,5.5,9),col=letters[1:3])
  gtext<-data.frame(txt=c("Will Hire","Currently Hire","Used to Hire"),x=(rect$xstart+rect$xend)/2,y=max(df$coef)*4)
  
  ggplot(df) + 
    geom_pointrange(aes(x =  var, y = coef,  ymax = coef + 1.96*se, ymin = coef -1.96*se))+
    geom_rect(data=rect,aes(xmin=xstart,xmax=xend,ymin=-Inf,ymax=Inf,fill=col),alpha=.2)+
    geom_hline(yintercept=0,linetype = "dashed") + 
    ylab("Estimate of Revolving Door") +
    xlab("")  +
    scale_fill_manual(values=c("grey60","white","grey60"))+
    theme(legend.text=element_text(size=14))+
    guides(fill=F) +
    geom_text(data=gtext,aes(label=txt,x=x,y=y),size=5) + 
    theme_zl()
}

flex_vis(m1)

ggsave("~/Dropbox/Apps/Overleaf/cost_revolving_door_2020/flex.pdf",width = 7,height = 4.5)


#Figure 6:invest sentiment visualization####
library(wesanderson)
library(viridis)
## Upper Panel
sent_word = read_rds("/Users/zerenli1992/Dropbox/political_risk/replication_files/sent_words.rds")

positive_output = sent_word %>% filter(positive == 1)
negative_output = sent_word %>% filter(positive == 0)
pos = wordcloud(words = positive_output$translatedContent,
          scale = c(2,1),
          freq = sqrt(positive_output$n),
          random.order=FALSE, 
          #scale = c(4,1),
          colors = brewer.pal(10,"Reds"),
          lang = "english" )

## negative sentiment
neg = wordcloud(words = negative_output$translatedContent, 
          max.words = 100,
          scale = c(2,1),
          freq = sqrt(negative_output$n),
          random.order=FALSE, 
          lang = "english",
          colors=brewer.pal(10, "Blues") )

colors = brewer.pal(100,"Reds")


# histogram
fig1 = final_df2012 %>% 
  mutate(pc_dummy = as.factor(pc_dummy)) %>% 
  ggplot(aes(x = n_positive)) +
  geom_histogram(color = "black", fill= "tomato", alpha =.5) +
  scale_x_log10() +
  xlab("") +
  ylab("Number of Report") +
  theme_zl()

fig2 = final_df %>% 
  filter(year >2011) %>% 
  mutate(pc_dummy = as.factor(pc_dummy)) %>% 
  ggplot(aes(x = n_negative)) +
  geom_histogram(color = "black", fill= "steelblue1", alpha =.5) +
  scale_x_log10() +
  xlab("") +
  ylab("Number of Report") +
  theme_zl()

ggarrange(fig1,fig2)
ggsave("~/Dropbox/Apps/Overleaf/cost_revolving_door_2020/sent_hist.png",width = 8,height = 3.5)



